#This code allows one to replicate our estimation of sigma.

#This will clear all objects includes hidden objects
rm(list = ls(all.names = TRUE))
gc()

#Load the libraries
library(zoo)
library(stringr)
library(lubridate)
library(devtools)
library(Hmisc)
library(lfe)
library(scales)
library(timeDate)
library(data.table)
library(tikzDevice)
library(stringr)
library(fixest)
library(stargazer)
library(alpaca) #This package allows us to calculate clustered standard errors easily in non-linear models
library(sqldf) #USe to clean the 2019 data
library(dplyr)
library(AER)
library(estimatr)

#Set paths
Main_path <- "XXX"

#First I set the paths that I will use.
Data_path <- paste0(Main_path, "data/")
Market_sentiment_path <- paste0(Main_path, "market_sentiment/")

#Load in the data.
WRDS_sum_stats <- fread(paste0(Data_path, "wrds_summary_statistics/logit_estimation_data/SumStats_combine_TRACE_standard_EOD_Jan_June.csv"),
                        select = c("DATE", "ph", "pl", "xl"))

MS_data <- fread(paste0(Market_sentiment_path,"merged_market_sentiment_data.csv"))
MS_data <- MS_data[All_All != "",.(date, var, All_All, All_DBC,  All_DSC, All_AB, All_AS)]
MS_data[, `:=`(Customer = as.numeric(sub(",", "", All_DSC, fixed = TRUE)) + as.numeric(sub(",", "", All_DBC, fixed = TRUE)) + as.numeric(sub(",", "", All_AB, fixed = TRUE)) +
                 as.numeric(sub(",", "", All_AS, fixed = TRUE)),
               
               All =  as.numeric(sub(",", "", All_All, fixed = TRUE) ))]
MS_data[, date := as.IDate(date)]


MS_data <- dcast(MS_data, formula = date ~ var, value.var =  c("All", "Customer"))
#MS_data[, `:=`(sec = as.numeric(sub(",", "", sec, fixed = TRUE)), tran = as.numeric(sub(",", "", tran, fixed = TRUE)), vol = as.numeric(sub(",", "", vol, fixed = TRUE)) ) ]
MS_data[, `:=`(Week_day = wday(date), Month = month(date), Year = year(date))]
MS_data <- MS_data[Week_day != 1][Week_day != 7]
MS_data <- MS_data[!(date %in%  as.IDate(c("2020-01-02", "2020-04-09", "2020-05-22", "2020-07-02", "2020-10-12", "2020-11-11", "2020-11-26", "2020-11-27", "2020-12-24", "2020-12-26", "2020-12-31",
                                           "2019-01-02", "2019-04-18", "2019-05-24", "2019-07-03", "2019-07-05", "2019-10-14", "2019-11-11", "2019-11-29", "2019-12-24", "2019-12-26", "2019-12-31",
                                           "2018-01-02", "2018-03-29", "2018-05-25", "2018-07-03", "2018-07-05", "2018-10-08", "2018-11-12", "2018-11-23", "2018-12-24", "2018-12-26", "2018-12-31",
                                           "2017-01-02", "2017-04-13", "2017-05-26", "2017-07-03", "2017-07-05", "2017-10-09", "2017-11-10", "2017-11-24", "2017-12-24", "2017-12-26", "2017-12-31",
                                           "2016-01-02", "2016-03-24", "2016-05-27", "2016-07-03", "2016-07-05", "2016-10-10", "2016-11-11", "2016-11-25", "2016-12-24", "2016-12-26", "2016-12-31" )))]

MS_data <- MS_data[date > as.IDate("2017-01-01")]

x <- data.table(date = as.IDate(as.character(holidayNYSE(2016:2020))))
MS_data <- MS_data[!x, on = "date"]
MS_data[, `:=`(Week_day = wday(date), Month = month(date), Year = year(date), Week = week(date))]

#USE week dummies to deseasonalize.
reg <- felm(log(Customer_vol) ~ factor(Week) | Week_day, MS_data)
MS_data[, Deseasonalized_ln_vol_C2 := reg$residuals]


reg <- felm(log(Customer_tran) ~ factor(Week) | Week_day, MS_data)
MS_data[, Deseasonalized_ln_trans_C2 := reg$residuals]


setnames(MS_data, "date", "trd_exctn_dt_s")
Aggregate_data <- WRDS_sum_stats[,.(trd_exctn_dt_s  = DATE, xl, ph, pl)][MS_data, on = "trd_exctn_dt_s"]

#Add in our policy dates.
Aggregate_data[, `:=`(dum_crisis = fifelse(trd_exctn_dt_s <= "2020-03-23" & trd_exctn_dt_s >= "2020-03-05", 1, 0), 
                      dum_SMCCF = fifelse(trd_exctn_dt_s > "2020-03-23"& trd_exctn_dt_s < "2020-04-10" , 1, 0),
                      dum_SMCCF_expansion = fifelse(trd_exctn_dt_s >  "2020-04-10" , 1, 0))]

Aggregate_data <- Aggregate_data[trd_exctn_dt_s < "2020-08-13"]


#This funcion mutliplies the coeffieicnts by -1 to turn -sigma into sigma
Inverse_demand_mod <- function(Model_delta){
  
  k1 = length(Model_delta$coefficients)
  
  Type <- fifelse(is.null(Model_delta$vcv), "IV", "OLS")
  
  if(Type == "IV"){
    Entry <- which((names(Model_delta$coefficients) %in% c("LN")))[1]
  } else {
    Entry <- which((rownames(Model_delta$coefficients) %in% c("LN")))[1]
  }
  
  #Here we have the function that we transform the coefficients.
  Trans_fun <- function(x){
    x[Entry] <- -x[Entry]
    return(x)
  }
  
  #Transform the coefficients
  Coeff <- Trans_fun(Model_delta$coefficients)
  Model_delta$coefficients <- Coeff
  
  if(Type == "OLS"){
    Model_delta$beta[Entry] <- Coeff[Entry]
    
    
  }
  
  return(Model_delta)
  
}



#Table 5
Regdata <- Aggregate_data[Year == 2020 &  (trd_exctn_dt_s < "2020-03-01" | trd_exctn_dt_s >= "2020-04-15" & trd_exctn_dt_s <  "2020-08-01")]
Regdata[, LN := log((1-xl)/xl) ]
Regdata[, Wtd_price_diff := ph - pl ]

#Table 5
IV_reg_volc <- ivreg( Wtd_price_diff ~ LN  + dum_SMCCF_expansion | Deseasonalized_ln_vol_C2   + dum_SMCCF_expansion,  data = Regdata)
IV_reg_Numc <- ivreg( Wtd_price_diff ~ LN  + dum_SMCCF_expansion | Deseasonalized_ln_trans_C2   + dum_SMCCF_expansion,  data = Regdata)

#Appendix table 8
Over_Identified_c <- ivreg( Wtd_price_diff ~ LN  + dum_SMCCF_expansion | Deseasonalized_ln_vol_C2 + Deseasonalized_ln_trans_C2 + dum_SMCCF_expansion,  data = Regdata)

#Also want robust standard errors.
IV_reg_volc_rob <- iv_robust( Wtd_price_diff ~ LN  + dum_SMCCF_expansion | Deseasonalized_ln_vol_C2   + dum_SMCCF_expansion,  data = Regdata)
IV_reg_Numc_rob <- iv_robust( Wtd_price_diff ~ LN  + dum_SMCCF_expansion | Deseasonalized_ln_trans_C2   + dum_SMCCF_expansion,  data = Regdata)
Over_Identified_c_rob <- iv_robust( Wtd_price_diff ~ LN  + dum_SMCCF_expansion | Deseasonalized_ln_vol_C2 + Deseasonalized_ln_trans_C2 + dum_SMCCF_expansion,  data = Regdata)

#Get the SE.
SE_OLS <- pmax(se(OLS_regc), se(OLS_regc, robust = T))
SE_volc <- pmax(se(IV_reg_volc), IV_reg_volc_rob$std.error)
SE_Numc <- pmax(se(IV_reg_Numc), IV_reg_Numc_rob$std.error)
SE_Overc <- pmax(se(Over_Identified_c), Over_Identified_c_rob$std.error)

#Get the diagnostics.
summary(IV_reg_volc, diagnostics = T)
summary(IV_reg_Numc, diagnostics = T)
summary(Over_Identified_c, diagnostics = T)

#Modify the coefficients
IV_reg_volc <- Inverse_demand_mod(IV_reg_volc)
IV_reg_Numc <- Inverse_demand_mod(IV_reg_Numc)
Over_Identified_c <- Inverse_demand_mod(Over_Identified_c)

#Now lets export all as a single table for sigma 
stargazer(IV_reg_volc, IV_reg_Numc, Over_Identified_c,  
          covariate.labels = c( "$\\log(x_h/x_l)$", "Post-crisis"),
          se = list(SE_OLS, SE_volc, SE_Numc, SE_Overc),
          
          #column.labels = c("$\\log(x_h/x_l)$"),# "All trades", "Customer trades"),
          dep.var.labels = c( "$p_h-p_l$"),
          column.labels   = c("OLS", "IV-volume", "IV-\\# trades", "2SLS"),
          float = FALSE, digits = 2, omit.stat=c("LL","ser","f"),
          model.names = FALSE, 
          type = "text")

#Appendix table 9.
#First stage for testing the instrument.
first_stage_wtd <- felm(LN ~ Deseasonalized_ln_vol_C2 + dum_SMCCF_expansion, Regdata)
first_stage_wtd_N <- felm(LN ~ Deseasonalized_ln_trans_C2 + dum_SMCCF_expansion, Regdata)

#Get the max SEs
SE_first_vol <- pmax(se(first_stage_wtd), se(first_stage_wtd, robust = T))
SE_first_N <- pmax(se(first_stage_wtd_N), se(first_stage_wtd_N, robust = T))


stargazer(first_stage_wtd, first_stage_wtd_N,
          dep.var.labels = c(  "$\\log(x_h/x_l)$"),
          se = list(SE_first_vol, SE_first_N),
          covariate.labels = c("log(Volume)", "log(Number of trades)", "Post-crisis"),
          float = FALSE, digits = 2, omit.stat=c("LL","ser"),
          type = "text")

#Internet appendix  table 6. Full sample results
Regdata_full_sample <- Aggregate_data[Year == 2020 ]
Regdata_full_sample[, LN := log((1-xl)/xl) ]
Regdata_full_sample[, Wtd_price_diff := ph - pl ]


IV_reg_volc <- ivreg( Wtd_price_diff ~ LN  + dum_crisis + dum_SMCCF + dum_SMCCF_expansion | Deseasonalized_ln_vol_C2 + dum_crisis + dum_SMCCF + dum_SMCCF_expansion,  data = Regdata_full_sample)
IV_reg_Numc <- ivreg( Wtd_price_diff ~ LN  + dum_crisis + dum_SMCCF + dum_SMCCF_expansion | Deseasonalized_ln_trans_C2   + dum_crisis + dum_SMCCF + dum_SMCCF_expansion,  data = Regdata_full_sample)
Over_Identified_c <- ivreg( Wtd_price_diff ~ LN  + dum_crisis + dum_SMCCF + dum_SMCCF_expansion | Deseasonalized_ln_vol_C2 + Deseasonalized_ln_trans_C2 + dum_crisis + dum_SMCCF + dum_SMCCF_expansion,  data = Regdata_full_sample)


IV_reg_volc_rob <- iv_robust( Wtd_price_diff ~ LN  + dum_crisis + dum_SMCCF + dum_SMCCF_expansion | Deseasonalized_ln_vol_C2 + dum_crisis + dum_SMCCF + dum_SMCCF_expansion,  data = Regdata_full_sample)
IV_reg_Numc_rob <- iv_robust( Wtd_price_diff ~ LN  + dum_crisis + dum_SMCCF + dum_SMCCF_expansion | Deseasonalized_ln_trans_C2 + dum_crisis + dum_SMCCF + dum_SMCCF_expansion,  data = Regdata_full_sample)
Over_Identified_c_rob <- iv_robust( Wtd_price_diff ~ LN  + dum_crisis + dum_SMCCF + dum_SMCCF_expansion | Deseasonalized_ln_vol_C2 + Deseasonalized_ln_trans_C2 + dum_crisis + dum_SMCCF + dum_SMCCF_expansion,  data = Regdata_full_sample)

#Get the SE.
SE_volc <- pmax(se(IV_reg_volc), IV_reg_volc_rob$std.error)
SE_Numc <- pmax(se(IV_reg_Numc), IV_reg_Numc_rob$std.error)
SE_Overc <- pmax(se(Over_Identified_c), Over_Identified_c_rob$std.error)

#Get the diagnostics.
summary(IV_reg_volc, diagnostics = T)
summary(IV_reg_Numc, diagnostics = T)
summary(Over_Identified_c, diagnostics = T)


#First stage for testing the instrument.
first_stage_wtd <- felm(LN ~ Deseasonalized_ln_vol_C2 + dum_crisis + dum_SMCCF + dum_SMCCF_expansion, Regdata_full_sample)
first_stage_wtd_N <- felm(LN ~ Deseasonalized_ln_trans_C2 + dum_crisis + dum_SMCCF + dum_SMCCF_expansion, Regdata_full_sample)

#Get the max SEs
SE_first_vol <- pmax(se(first_stage_wtd), se(first_stage_wtd, robust = T))
SE_first_N <- pmax(se(first_stage_wtd_N), se(first_stage_wtd_N, robust = T))

#Modify the coefficients
IV_reg_volc <- Inverse_demand_mod(IV_reg_volc)
IV_reg_Numc <- Inverse_demand_mod(IV_reg_Numc)
Over_Identified_c <- Inverse_demand_mod(Over_Identified_c)


#Now lets export all as a single table for sigma 
stargazer(IV_reg_volc, IV_reg_Numc, Over_Identified_c,  
          covariate.labels = c( "$\\log(x_h/x_l)$","Crisis", "SMCCF", "SMCCF expansion"),
          se = list( SE_volc, SE_Numc, SE_Overc),
          
          dep.var.labels = c( "$p_h-p_l$"),
          column.labels   = c("OLS", "IV-volume", "IV-\\# trades", "2SLS"),
          float = FALSE, digits = 2, omit.stat=c("LL","ser","f"),
          model.names = FALSE, 
          type = "text")

#Internet appendix table 7.
#First stage for testing the instrument.
first_stage_wtd <- felm(LN ~ Deseasonalized_ln_vol_C2 + dum_crisis + dum_SMCCF + dum_SMCCF_expansion, Regdata_full_sample)
first_stage_wtd_N <- felm(LN ~ Deseasonalized_ln_trans_C2 + dum_crisis + dum_SMCCF + dum_SMCCF_expansion, Regdata_full_sample)

#Get the max SEs
SE_first_vol <- pmax(se(first_stage_wtd), se(first_stage_wtd, robust = T))
SE_first_N <- pmax(se(first_stage_wtd_N), se(first_stage_wtd_N, robust = T))


#First stage regressions.
#Now lets export all as a single table for first stage results
stargazer(first_stage_wtd, first_stage_wtd_N,
          dep.var.labels = c(  "$\\log(x_h/x_l)$"),
          covariate.labels = c("log(Volume)", "log(Number of trades)","Crisis", "SMCCF", "SMCCF expansion"),
          float = FALSE, digits = 2, omit.stat=c("LL","ser"),
          se = list(SE_first_vol, SE_first_N),
          type = "text")


